There are mainly the following sections included in the codes: generating box plots, cluster dendrogram, normalization and subsequent dendrograms, designing model and contrast matrix, fitting the model, and volcano plots. More details referred to the appendix.
## Adjusting for optical effect............Done.
## Computing affinities.Done.
## Adjusting for non-specific binding............Done.
## Normalizing
## Calculating Expression
## [1] "\nHere comes the summary of results:"
## logFC AveExpr t P.Value
## Min. :-9.19178 Min. : 2.279 Min. :-39.77095 Min. :0.0000
## 1st Qu.:-0.05972 1st Qu.: 2.281 1st Qu.: -0.70703 1st Qu.:0.1522
## Median : 0.00000 Median : 2.480 Median : 0.00000 Median :0.5080
## Mean :-0.02355 Mean : 4.375 Mean : 0.07445 Mean :0.5345
## 3rd Qu.: 0.03970 3rd Qu.: 6.241 3rd Qu.: 0.67369 3rd Qu.:1.0000
## Max. : 8.66974 Max. :15.542 Max. :295.37719 Max. :1.0000
## adj.P.Val B getsymbols
## Min. :0.0000 Min. :-7.711 Length:54675
## 1st Qu.:0.6036 1st Qu.:-7.711 Class :character
## Median :1.0000 Median :-7.452 Mode :character
## Mean :0.7436 Mean :-6.583
## 3rd Qu.:1.0000 3rd Qu.:-6.498
## Max. :1.0000 Max. :21.296
##
## 1 2 3
## 54587 33 55
The three contrast are huvec/choroid, huvec/retina, huvec/iris.
Here come the plots before normalization:
## [1] "Here comes the MA plot for huvec/choroid:"
## [1] "Here comes the MA plot for huvec/retina:"
## [1] "Here comes the MA plot for huvec/iris:"
Here come the plots after normalization:
## [1] "Here comes the MA plot for huvec/choroid:"
## [1] "Here comes the MA plot for huvec/retina:"
## [1] "Here comes the MA plot for huvec/iris:"
Here comes the volcano plot for “huvec_retina”:
Here comes the volcano plot for “huvec_iris”:
In volcano plots, x-axis denotes how large the change/difference is, and y-axis for the significancy. The larger both of absolute x and y value are, the better results are. In other words, the points in red/green color with larger y-value than 5 is considered the significantly differentially expressed genes. For example, HOXB7, HOXA5, JL1RL1, SOCS2, CONK for pair huvec_choroid, HOXB7, HOXA5, JL1RL1, GBGT1, DHH, GBGT1 etc for pair huvec_retina, and HOXB7, HOXA5, JL1RL1, HOXB6, SOCS2 for pair huvec_iris.
After comparing GO terms of HOXB7 human, HOXA5 human and HOXA5 chicken, it is found that there are common function 1 (DNA-binding transcription factor activity, RNA polymerase II-specific), function 2 (RNA polymerase II cis-regulatory region sequence-specific DNA binding), process 1 (anterior/posterior pattern specification) and process 2 (regulation of transcription by RNA polymerase II). Those are related more or less with DNA-binding and RNA polymerase which play exactly the key role during the process of expressing genes, such as regulation of transcriptions.
knitr::opts_chunk$set(eval=T,echo=F,
message = F, warning = F, error = F,
fig.width=7, fig.height=5, fig.align="center")
## prob 1
library(GEOquery)
library(simpleaffy)
library(RColorBrewer)
library(affyPLM)
library(limma)
library(hgu133plus2.db)
library(annotate)
library(ggplot2)
library(seriation)
library(plotly)
## install.packages("BiocManager")
## library(BiocManager)
## source("biocLite.R")
## BiocManager::install(version = "3.12")
## BiocManager::install(simpleaffy, type = "source", checkBuilt = TRUE)
## BiocManager::install("simpleaffy")
x = getGEOSuppFiles("GSE20986") ## download data files of GSE20986
untar("GSE20986_RAW.tar", exdir = "data") ## untar data file
cels = list.files("data/", pattern = "[gz]")
sapply(paste("data", cels, sep = "/"), gunzip) ## unzip data
## make data of matrix
phenodata = matrix(rep(list.files("data"), 2), ncol =2)
phenodata <- as.data.frame(phenodata)
colnames(phenodata) <- c("Name", "FileName")
phenodata$Targets <- c("iris",
"retina",
"retina",
"iris",
"retina",
"iris",
"choroid",
"choroid",
"choroid",
"huvec",
"huvec",
"huvec")
write.table(phenodata, "data/phenodata.txt", quote = F, sep = "\t", row.names = F)
## box plot
celfiles <- read.affy(covdesc = "phenodata.txt", path = "data")
boxplot(celfiles)
## colored box plot with target names
cols = brewer.pal(8, "Set1")
eset <- exprs(celfiles)
samples <- celfiles$Targets
#colnames(eset) ## array names
colnames(eset) <- samples
boxplot(celfiles, col = cols, las = 2)
## Cluster Dendrogram
distance <- dist(t(eset), method = "maximum")
clusters <- hclust(distance)
plot(clusters)
## Normalization
celfiles.gcrma = gcrma(celfiles)
par(mfrow=c(1,2))
boxplot(celfiles.gcrma, col = cols, las = 2, main = "Post-Normalization");
boxplot(celfiles, col = cols, las = 2, main = "Pre-Normalization")
#dev.off()
distance <- dist(t(exprs(celfiles.gcrma)), method = "maximum")
clusters <- hclust(distance)
plot(clusters) ## Cluster Dendrogram after normalization
## Design model matrix
samples <- as.factor(samples)
design <- model.matrix(~0+samples)
#colnames(design)
colnames(design) <- c("choroid", "huvec", "iris", "retina")
#design
## Make contrast matrix
contrast.matrix = makeContrasts(
huvec_choroid = huvec - choroid,
huvec_retina = huvec - retina,
huvec_iris <- huvec - iris,
levels = design)
## Fit model
fit = lmFit(celfiles.gcrma, design)
huvec_fit <- contrasts.fit(fit, contrast.matrix)
huvec_ebay <- eBayes(huvec_fit)
##
probenames.list <- rownames(topTable(huvec_ebay, number = 100000))
getsymbols <- getSYMBOL(probenames.list, "hgu133plus2")
results <- topTable(huvec_ebay, number = 100000, coef = "huvec_choroid")
results <- cbind(results, getsymbols)
print("\nHere comes the summary of results:")
summary(results)
##
results$threshold <- "1"
a <- subset(results, adj.P.Val < 0.05 & logFC > 5)
results[rownames(a), "threshold"] <- "2"
b <- subset(results, adj.P.Val < 0.05 & logFC < -5)
results[rownames(b), "threshold"] <- "3"
table(results$threshold)
## Volcano plots
volcano <- ggplot(data = results,
aes(x = logFC, y = -1*log10(adj.P.Val),
colour = threshold,
label = getsymbols))
volcano <- volcano +
geom_point() +
scale_color_manual(values = c("black", "red", "green"),
labels = c("Not Significant", "Upregulated", "Downregulated"),
name = "Key/Legend")
volcano +
geom_text(data = subset(results, logFC > 5 & -1*log10(adj.P.Val) > 5), aes(x = logFC, y = -1*log10(adj.P.Val), colour = threshold, label = getsymbols) )
hm<-function(dis_seq,data){ ## heat-map function
set.seed(1234)
ord_seq<-get_order(seriate(dis_seq,method="HC"))
dist<-as.matrix(dis_seq)
names=colnames(dist)
plot_ly(x=names[ord_seq],y=names[ord_seq],z=dist[ord_seq,ord_seq],
type="heatmap",colors =colorRamp(c("yellow", "red"))) %>%
layout(title=paste("Heatmap",data),
xaxis=list(title=""),
yaxis=list(title="")) }
## Before normalization
da0<-cbind(rowSums(eset[,c(1,4,6)]),rowSums(eset[,c(2,3,5)]),
rowSums(eset[,c(7,8,9)]),rowSums(eset[,c(10,11,12)]))
colnames(da0)<-c('iris','retina','choroid','huvec')
#da0<-scale(da0)
print("Here comes the MA plot for huvec/choroid:")
y<-da0[,c(4,3)]
ma.plot(rowMeans(log2(y)), log2(y[, 1])-log2(y[, 2]), cex=1, main="huvec/choroid")
print("Here comes the MA plot for huvec/retina:")
y<-da0[,c(4,2)]
ma.plot(rowMeans(log2(y)), log2(y[, 1])-log2(y[, 2]), cex=1, main="huvec/retina")
print("Here comes the MA plot for huvec/iris:")
y<-da0[,c(4,1)]
ma.plot(rowMeans(log2(y)), log2(y[, 1])-log2(y[, 2]), cex=1, main="huvec/iris")
distance0 <- dist(t(eset), method = "maximum")
clusters0 <- hclust(distance0)
plot(clusters0)
dis0=dist(t(da0), method = "maximum")
hm(dis0,"before Normalization")
## After normalization
norm<-exprs(celfiles.gcrma)
da1<-cbind(rowSums(norm[,c(1,4,6)]),rowSums(norm[,c(2,3,5)]),
rowSums(norm[,c(7,8,9)]),rowSums(norm[,c(10,11,12)]))
colnames(da1)<-c('iris','retina','choroid','huvec')
#da0<-scale(da0)
print("Here comes the MA plot for huvec/choroid:")
y<-da1[,c(4,3)]
ma.plot(rowMeans(log2(y)), log2(y[, 1])-log2(y[, 2]), cex=1, main="huvec/choroid")
print("Here comes the MA plot for huvec/retina:")
y<-da1[,c(4,2)]
ma.plot(rowMeans(log2(y)), log2(y[, 1])-log2(y[, 2]), cex=1, main="huvec/retina")
print("Here comes the MA plot for huvec/iris:")
y<-da1[,c(4,1)]
ma.plot(rowMeans(log2(y)), log2(y[, 1])-log2(y[, 2]), cex=1, main="huvec/iris")
distance1 <- dist(t(norm), method = "maximum")
clusters1 <- hclust(distance1)
plot(clusters1) ## Cluster Dendrogram after normalization
dis1=dist(t(da1), method = "maximum")
hm(dis1,"after Normalization")
vol.plot<-function(co){
results <- topTable(huvec_ebay, number = 100000, coef = co)
results <- cbind(results, getsymbols)
##
results$threshold <- "1"
a <- subset(results, adj.P.Val < 0.05 & logFC > 5)
results[rownames(a), "threshold"] <- "2"
b <- subset(results, adj.P.Val < 0.05 & logFC < -5)
results[rownames(b), "threshold"] <- "3"
table(results$threshold)
## Volcano plots
volcano <- ggplot(data = results,
aes(x = logFC, y = -1*log10(adj.P.Val),
colour = threshold,
label = getsymbols))
volcano <- volcano +
geom_point() +
scale_color_manual(values = c("black", "red", "green"),
labels = c("Not Significant", "Upregulated", "Downregulated"),
name = "Key/Legend")
volcano +
geom_text(data = subset(results, logFC > 5 & -1*log10(adj.P.Val) > 5), aes(x = logFC, y = -1*log10(adj.P.Val), colour = threshold, label = getsymbols) )
}
vol.plot("huvec_retina")
vol.plot("huvec_iris <- huvec - iris")